From e8f64f24c7197b545878f78329e974548b66606f Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Fri, 1 Apr 2005 13:17:52 +0000 Subject: [PATCH] bitkeeper revision 1.1236.1.184 (424d4a00y8MNt89B4nCZ8LKcrTcZUw) Fix multi-VCPU TLB shootdown interface -- specify pointer to VCPU bitmap, so it is read at time of flush and not before (which might be too early, before all updates are flushed, leading to races). Also add selective multi-VCPU shootdown capability to update_va_mapping() and use this to make ptep_set_access_flags() a single hypercall. Signed-off-by: Keir Fraser --- .../i386-xen/i386-xen/xen_machdep.c | 2 +- .../i386-xen/xen/netfront/xn_netfront.c | 2 +- linux-2.4.29-xen-sparse/mm/memory.c | 2 +- .../arch/xen/i386/mm/hypervisor.c | 4 +- .../drivers/xen/blkback/blkback.c | 2 +- .../drivers/xen/netback/netback.c | 4 +- .../drivers/xen/netfront/netfront.c | 2 +- .../drivers/xen/usbback/usbback.c | 2 +- .../include/asm-xen/asm-i386/pgtable.h | 3 +- .../sys/arch/xen/xen/if_xennet.c | 6 +- xen/arch/x86/mm.c | 88 +++++++++++++------ xen/include/public/xen.h | 20 +++-- 12 files changed, 87 insertions(+), 50 deletions(-) diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c index 4fa020f531..1dcd9448d3 100644 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c @@ -540,7 +540,7 @@ mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma) MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping; MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va; MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma; - MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG_LOCAL; + MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG|UVMF_LOCAL; mcl_increment_idx(); } diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c index 40d9e4636e..1de71545fb 100644 --- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c +++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c @@ -440,7 +440,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc) PT_UPDATES_FLUSH(); /* After all PTEs have been zapped we blow away stale TLB entries. */ - xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_LOCAL; + xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; /* Give away a batch of pages. */ xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op; diff --git a/linux-2.4.29-xen-sparse/mm/memory.c b/linux-2.4.29-xen-sparse/mm/memory.c index 883a2928ab..875e5745c4 100644 --- a/linux-2.4.29-xen-sparse/mm/memory.c +++ b/linux-2.4.29-xen-sparse/mm/memory.c @@ -911,7 +911,7 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr { #ifdef CONFIG_XEN if ( likely(vma->vm_mm == current->mm) ) { - HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG_LOCAL); + HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL); } else { set_pte(page_table, entry); flush_tlb_page(vma, address); diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c index 82493c1236..d34fd71f8e 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -108,7 +108,7 @@ void xen_tlb_flush_mask(cpumask_t mask) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_MULTI; - op.cpuset = mask.bits[0]; + op.cpuset = (unsigned long)mask.bits; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } @@ -124,7 +124,7 @@ void xen_invlpg_mask(cpumask_t mask, unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_INVLPG_MULTI; - op.cpuset = mask.bits[0]; + op.cpuset = (unsigned long)mask.bits; op.linear_addr = ptr & PAGE_MASK; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c index 4c3c8a5d06..a827ab4eda 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c @@ -111,7 +111,7 @@ static void fast_flush_area(int idx, int nr_pages) mcl[i].args[2] = 0; } - mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL; + mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c index 9079ea2d47..d08c296a02 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c @@ -270,7 +270,7 @@ static void net_rx_action(unsigned long unused) mcl->args[3] = DOMID_SELF; mcl++; - mcl[-3].args[2] = UVMF_TLB_FLUSH_ALL; + mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) ) BUG(); @@ -429,7 +429,7 @@ static void net_tx_action(unsigned long unused) mcl++; } - mcl[-1].args[2] = UVMF_TLB_FLUSH_ALL; + mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) ) BUG(); diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c index 0ac6747900..f729295021 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c @@ -388,7 +388,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) } /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_ALL; + rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; /* Give away a batch of pages. */ rx_mcl[i].op = __HYPERVISOR_dom_mem_op; diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c index 72a6be3a2f..42439405cd 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c @@ -195,7 +195,7 @@ static void fast_flush_area(int idx, int nr_pages) mcl[i].args[2] = 0; } - mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL; + mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); } diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h index 5333fde72b..5afb8ced1f 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h @@ -407,8 +407,7 @@ extern void noexec_setup(const char *str); do { \ if (__dirty) { \ if ( likely((__vma)->vm_mm == current->mm) ) { \ - HYPERVISOR_update_va_mapping((__address), (__entry), 0); \ - flush_tlb_page((__vma), (__address)); \ + HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits)); \ } else { \ xen_l1_entry_update((__ptep), (__entry).pte_low); \ flush_tlb_page((__vma), (__address)); \ diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c index 687b21080b..2ffe6da2a0 100644 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c @@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id) xpq_flush_queue(); /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL; + rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; /* Give away a batch of pages. */ rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; @@ -681,7 +681,7 @@ xen_network_handler(void *arg) mcl->op = __HYPERVISOR_update_va_mapping; mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va; mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW; - mcl->args[2] = UVMF_TLB_FLUSH_LOCAL; // 0; + mcl->args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; // 0; mcl++; xpmap_phys_to_machine_mapping @@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc) xpq_flush_queue(); /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL; + rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; /* Give away a batch of pages. */ rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 845fbc9f66..d1eb5650b8 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1329,6 +1329,25 @@ static int set_foreigndom(unsigned int cpu, domid_t domid) return okay; } +static inline unsigned long vcpuset_to_pcpuset( + struct domain *d, unsigned long vset) +{ + unsigned int vcpu; + unsigned long pset = 0; + struct exec_domain *ed; + + while ( vset != 0 ) + { + vcpu = find_first_set_bit(vset); + vset &= ~(1UL << vcpu); + if ( (vcpu < MAX_VIRT_CPUS) && + ((ed = d->exec_domain[vcpu]) != NULL) ) + pset |= 1UL << ed->processor; + } + + return pset; +} + int do_mmuext_op( struct mmuext_op *uops, unsigned int count, @@ -1478,19 +1497,17 @@ int do_mmuext_op( case MMUEXT_TLB_FLUSH_MULTI: case MMUEXT_INVLPG_MULTI: { - unsigned long inset = op.cpuset, outset = 0; - while ( inset != 0 ) + unsigned long vset, pset; + if ( unlikely(get_user(vset, (unsigned long *)op.cpuset)) ) { - unsigned int vcpu = find_first_set_bit(inset); - inset &= ~(1UL<exec_domain[vcpu]) != NULL) ) - outset |= 1UL << ed->processor; + okay = 0; + break; } + pset = vcpuset_to_pcpuset(d, vset); if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) - flush_tlb_mask(outset & d->cpuset); + flush_tlb_mask(pset & d->cpuset); else - flush_tlb_one_mask(outset & d->cpuset, op.linear_addr); + flush_tlb_one_mask(pset & d->cpuset, op.linear_addr); break; } @@ -1999,6 +2016,7 @@ int do_update_va_mapping(unsigned long va, struct exec_domain *ed = current; struct domain *d = ed->domain; unsigned int cpu = ed->processor; + unsigned long vset, pset, bmap_ptr; int rc = 0; perfc_incrc(calls_to_update_va); @@ -2013,11 +2031,6 @@ int do_update_va_mapping(unsigned long va, cleanup_writable_pagetable(d); - /* - * XXX When we make this support 4MB superpages we should also deal with - * the case of updating L2 entries. - */ - if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)], mk_l1_pgentry(val))) ) rc = -EINVAL; @@ -2025,21 +2038,42 @@ int do_update_va_mapping(unsigned long va, if ( unlikely(shadow_mode_enabled(d)) ) update_shadow_va_mapping(va, val, ed, d); - switch ( flags & UVMF_FLUSH_MASK ) + switch ( flags & UVMF_FLUSHTYPE_MASK ) { - case UVMF_TLB_FLUSH_LOCAL: - local_flush_tlb(); - percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; - break; - case UVMF_TLB_FLUSH_ALL: - flush_tlb_mask(d->cpuset); - percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; - break; - case UVMF_INVLPG_LOCAL: - local_flush_tlb_one(va); + case UVMF_TLB_FLUSH: + switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) + { + case UVMF_LOCAL: + local_flush_tlb(); + break; + case UVMF_ALL: + flush_tlb_mask(d->cpuset); + break; + default: + if ( unlikely(get_user(vset, (unsigned long *)bmap_ptr)) ) + rc = -EFAULT; + pset = vcpuset_to_pcpuset(d, vset); + flush_tlb_mask(pset & d->cpuset); + break; + } break; - case UVMF_INVLPG_ALL: - flush_tlb_one_mask(d->cpuset, va); + + case UVMF_INVLPG: + switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) + { + case UVMF_LOCAL: + local_flush_tlb_one(va); + break; + case UVMF_ALL: + flush_tlb_one_mask(d->cpuset, va); + break; + default: + if ( unlikely(get_user(vset, (unsigned long *)bmap_ptr)) ) + rc = -EFAULT; + pset = vcpuset_to_pcpuset(d, vset); + flush_tlb_one_mask(pset & d->cpuset, va); + break; + } break; } diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 0ec17675c2..43a2e87e02 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -135,11 +135,11 @@ * linear_addr: Linear address to be flushed from the local TLB. * * cmd: MMUEXT_TLB_FLUSH_MULTI - * cpuset: Set of VCPUs to be flushed. + * cpuset: Pointer to bitmap of VCPUs to be flushed. * * cmd: MMUEXT_INVLPG_MULTI * linear_addr: Linear address to be flushed. - * cpuset: Set of VCPUs to be flushed. + * cpuset: Pointer to bitmap of VCPUs to be flushed. * * cmd: MMUEXT_TLB_FLUSH_ALL * No additional arguments. Flushes all VCPUs' TLBs. @@ -188,17 +188,21 @@ struct mmuext_op { /* SET_LDT */ unsigned int nr_ents; /* TLB_FLUSH_MULTI, INVLPG_MULTI */ - unsigned long cpuset; + void *cpuset; }; }; #endif /* These are passed as 'flags' to update_va_mapping. They can be ORed. */ -#define UVMF_TLB_FLUSH_LOCAL 1 /* Flush local CPU's TLB. */ -#define UVMF_INVLPG_LOCAL 2 /* Flush VA from local CPU's TLB. */ -#define UVMF_TLB_FLUSH_ALL 3 /* Flush all TLBs. */ -#define UVMF_INVLPG_ALL 4 /* Flush VA from all TLBs. */ -#define UVMF_FLUSH_MASK 7 +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +#define UVMF_NONE (0UL) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<1) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ /* * Commands to HYPERVISOR_sched_op(). -- 2.30.2